# ========================================================================= #
# Project: Lexical Ambiguity in Political Rhetoric (BJPolS)
# - Script: Compare average usage of moral foundations
# - Author: Patrick Kraft (patrickwilli.kraft@uc3m.es)
# ========================================================================= #


# Load packages and custom functions --------------------------------------

source(here::here("code/00-func.R"))


# Load dictionaries & speeches --------------------------------------------

load(here("out/speeches.Rdata"))


# Corpus overview ---------------------------------------------------------

## basic table summarizing main speeches
tmp <- list(sotu, qs, co, pl)
tibble('Speech Type' = c("US State of the Union", "UK Queen's Speeches", 
                         "US Convention Speeches","UK Party Leader Speeches"),
       'Number of Documents' = map_dbl(tmp, ~ndoc(corpus(.))),
       'Average Word Count' = map_dbl(tmp, ~mean(ntoken(corpus(.), remove_punct = TRUE)))) %>%
  xtable(digits = 0, caption = "Overview of Speeches",
         label = "tab:overview",
         align = c("llcc")) %>%
  print(include.rownames = FALSE, comment = FALSE, 
        file = here("out/tab01-overview.tex"))
rm(tmp)


# Extract dictionary counts -----------------------------------------------

## a) US State of the Union
sotu_data <- mftPercent(sotu)

## b) UK Queen's Speeches
qs_data <- mftPercent(qs)

## c) US Convention Speeches
co_data <- mftPercent(co)

## d) UK Party Leader Speeches
pl_data <- mftPercent(pl)

## e) US Presidential Debates
deb_data <- mftPercent(deb)

## f) US Senate Emails
email_data <- mftPercent(email)


# Plot averages by party --------------------------------------------------

set.seed(42)

## a) US State of the Union
p_sotu <- sotu_data %>%
  bootstraps(100, strata = party) %>%
  mutate(splits = map(splits, mftBoot)) %>%
  unnest(cols = splits) %>%
  group_by(party, foundation) %>%
  summarize(average = mean(percent),
            cilo = quantile(percent, probs = .025),
            cihi = quantile(percent, probs = .975)) %>%
  ggplot(aes(x = foundation, y = average, 
             ymin = cilo, ymax = cihi,
             col = party, shape = party)) +
  geom_point(size = 1.5, position=position_dodge(width=-0.3)) +
  geom_errorbar(width=0, position=position_dodge(width=-0.3)) +
  scale_color_brewer(palette = "Dark2") +
  labs(title = paste0("a) US State of the Union (N = ", nrow(sotu_data), ")"), 
       y = "Percent", x = NULL) +
  ylim(0,1.6) + coord_flip() +
  theme_mft()

## b) UK Queen's Speeches
p_qs <- qs_data %>%
  bootstraps(100, strata = party) %>%
  mutate(splits = map(splits, mftBoot)) %>%
  unnest(cols = splits) %>%
  group_by(party, foundation) %>%
  summarize(average = mean(percent),
            cilo = quantile(percent, probs = .025),
            cihi = quantile(percent, probs = .975)) %>%
  ggplot(aes(x = foundation, y = average, 
             ymin = cilo, ymax = cihi,
             col = party, shape = party)) +
  geom_point(size = 1.5, position=position_dodge(width=-0.3)) +
  geom_errorbar(width=0, position=position_dodge(width=-0.3)) +
  scale_color_brewer(palette = "Dark2") +
  labs(title = paste0("b) UK Queen's Speeches (N = ", nrow(qs_data), ")"), 
       y = "Percent", x = NULL) +
  ylim(0,1.6) + coord_flip() +
  theme_mft()

## c) US Convention Speeches
p_co <- co_data %>%
  bootstraps(100, strata = party) %>%
  mutate(splits = map(splits, mftBoot)) %>%
  unnest(cols = splits) %>%
  group_by(party, foundation) %>%
  summarize(average = mean(percent),
            cilo = quantile(percent, probs = .025),
            cihi = quantile(percent, probs = .975)) %>%
  ggplot(aes(x = foundation, y = average, 
             ymin = cilo, ymax = cihi,
             col = party, shape = party)) +
  geom_point(size = 1.5, position=position_dodge(width=-0.3)) +
  geom_errorbar(width=0, position=position_dodge(width=-0.3)) +
  scale_color_brewer(palette = "Dark2") +
  labs(title = paste0("c) US Convention Speeches (N = ", nrow(co_data), ")"), 
       y = "Percent", x = NULL) +
  ylim(0,1.6) + coord_flip() +
  theme_mft()

## d) UK Party Leader Speeches
p_pl <- pl_data %>%
  bootstraps(100, strata = party) %>%
  mutate(splits = map(splits, mftBoot)) %>%
  unnest(cols = splits) %>%
  group_by(party, foundation) %>%
  summarize(average = mean(percent),
            cilo = quantile(percent, probs = .025),
            cihi = quantile(percent, probs = .975)) %>%
  ggplot(aes(x = foundation, y = average, 
             ymin = cilo, ymax = cihi,
             col = party, shape = party)) +
  geom_point(size = 1.5, position=position_dodge(width=-0.3)) +
  geom_errorbar(width=0, position=position_dodge(width=-0.3)) +
  scale_color_brewer(palette = "Dark2") +
  labs(title = paste0("d) UK Party Leader Speeches (N = ", nrow(pl_data), ")"), 
       y = "Percent", x = NULL) +
  ylim(0,1.6) + coord_flip() +
  theme_mft()

## e) US Presidential Debates
p_deb <- deb_data %>%
  bootstraps(100, strata = party) %>%
  mutate(splits = map(splits, mftBoot)) %>%
  unnest(cols = splits) %>%
  group_by(party, foundation) %>%
  summarize(average = mean(percent),
            cilo = quantile(percent, probs = .025),
            cihi = quantile(percent, probs = .975)) %>%
  ggplot(aes(x = foundation, y = average, 
             ymin = cilo, ymax = cihi,
             col = party, shape = party)) +
  geom_point(size = 1.5, position=position_dodge(width=-0.3)) +
  geom_errorbar(width=0, position=position_dodge(width=-0.3)) +
  scale_color_brewer(palette = "Dark2") +
  labs(title = "Average Percentage of Moral Terms", y = "Percent", x = NULL,
       caption = paste0("Total number of documents = ", nrow(deb_data))) +
  ylim(0,1.6) + coord_flip() +
  theme_mft()

## f) US Senate Emails
p_email <- email_data %>%
  bootstraps(100, strata = party) %>%
  mutate(splits = map(splits, mftBoot)) %>%
  unnest(cols = splits) %>%
  group_by(party, foundation) %>%
  summarize(average = mean(percent),
            cilo = quantile(percent, probs = .025),
            cihi = quantile(percent, probs = .975)) %>%
  ggplot(aes(x = foundation, y = average, 
             ymin = cilo, ymax = cihi,
             col = party, shape = party)) +
  geom_point(size = 1.5, position=position_dodge(width=-0.3)) +
  geom_errorbar(width=0, position=position_dodge(width=-0.3)) +
  scale_color_brewer(palette = "Dark2") +
  labs(title = "Average Percentage of Moral Terms", y = "Percent", x = NULL,
       caption = paste0("Total number of documents = ", nrow(email_data))) +
  ylim(0,2.5) + coord_flip() +
  theme_mft()

## Combine plots
grid.arrange(p_sotu, p_co, p_qs, p_pl, ncol = 2) %>%
  ggsave(here("out/fig01-mft_avg.png"), ., height = 5, width = 6, dpi = 600)

ggsave(here("out/fig04a-mft_avg_debates.png"), p_deb, height = 2.5, width = 3, dpi = 600)
ggsave(here("out/fig05a-mft_avg_emails.png"), p_email, height = 2.5, width = 3, dpi = 600)

## compute p-value for difference in care foundation (US debates)
deb_data %>%
  bootstraps(250, strata = party) %>%
  mutate(splits = map(splits, mftBoot)) %>%
  unnest(cols = splits) %>%
  pivot_wider(names_from = "party", values_from = "percent") %>% 
  mutate(diff = Democratic - Republican) %>% 
  group_by(foundation) %>% 
  summarize(mean = mean(diff),
            pval = 2*mean(diff<0))


# Plot variation over time ------------------------------------------------

## US metadata: party in govt
sotu_govt <- sotu_data %>% 
  select(year, party) %>%
  mutate(switch = c(-1,diff(as.numeric(party)))) %>%
  filter(switch != 0) %>%
  split(.$switch) %>%
  map_dfc(~select(., year))

## UK metadata: party in govt
qs_govt <- qs_data %>%
  select(year, party) %>%
  mutate(switch = c(-1,diff(as.numeric(party)))) %>%
  filter(switch != 0) %>%
  split(.$switch) %>%
  map_dfc(~select(., year))

## a) US State of the Union
p_sotu <- sotu_data %>%
  gather(foundation, percent, -party, -year, -speaker, -type) %>%
  mutate(foundation = factor(foundation, 
                             levels = c("Care", "Fairness", "Loyalty", 
                                        "Authority", "Sanctity"))) %>%
  group_by(year, foundation) %>%
  summarize(percent = mean(percent)) %>%
  ggplot(aes(x = year, y = percent, col=foundation, lty=foundation)) +
  labs(title = paste0("a) US State of the Union (N = ", nrow(sotu_data), ")"),
       x = NULL, y="Percent") +
  scale_x_continuous(expand = c(0, 0), limits = c(1932, 2019)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 2.5)) +
  geom_rect(data = sotu_govt, aes(xmin = year...1, xmax = year...2,
                                  ymin = -Inf, ymax = Inf),
            inherit.aes = FALSE, alpha = .3) +
  scale_colour_brewer(palette = "Dark2") +
  geom_line() + 
  theme_mft() +
  theme(legend.position = "none")

## b) UK Queen's Speeches
p_qs <- qs_data %>%
  gather(foundation, percent, -party, -year, -speaker, -type) %>%
  mutate(foundation = factor(foundation, 
                             levels = c("Care", "Fairness", "Loyalty", 
                                        "Authority", "Sanctity"))) %>%
  ggplot(aes(x = year, y = percent, col=foundation, lty=foundation)) +
  labs(title = paste0("b) UK Queen's Speeches (N = ", nrow(qs_data), ")"),
       x = NULL, y="Percent") +
  scale_x_continuous(expand = c(0, 0), limits = c(1932, 2019)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 2.5)) +
  geom_rect(data = qs_govt, aes(xmin = year...1, xmax = year...2,
                                  ymin = -Inf, ymax = Inf),
            inherit.aes = FALSE, alpha = .3) +
  scale_colour_brewer(palette = "Dark2") +
  geom_line() + 
  theme_mft() +
  theme(legend.position = "none")
  
## c) US Convention Speeches
p_co <- co_data %>%
  gather(foundation, percent, -party, -year, -speaker, -type) %>%
  mutate(foundation = factor(foundation, 
                             levels = c("Care", "Fairness", "Loyalty", 
                                        "Authority", "Sanctity"))) %>%
  group_by(year, foundation) %>%
  summarize(percent = mean(percent)) %>%
  ggplot(aes(x = year, y = percent, col=foundation, lty=foundation)) +
  labs(title = paste0("c) US Convention Speeches (N = ", nrow(co_data), ")"),
       x = NULL, y="Percent") +
  scale_x_continuous(expand = c(0, 0), limits = c(1932, 2019)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 2.5)) +
  scale_colour_brewer(palette = "Dark2") +
  geom_line() + 
  theme_mft() +
  theme(legend.position = "none")

## d) UK Party Leader Speeches
p_pl <- pl_data %>%
  gather(foundation, percent, -party, -year, -speaker, -type) %>%
  mutate(foundation = factor(foundation, 
                             levels = c("Care", "Fairness", "Loyalty", 
                                        "Authority", "Sanctity"))) %>%
  group_by(year, foundation) %>%
  summarize(percent = mean(percent)) %>%
  ggplot(aes(x = year, y = percent, col=foundation, lty=foundation)) +
  labs(title = paste0("d) UK Party Leader Speeches (N = ", nrow(pl_data), ")"),
       x = NULL, y="Percent") +
  scale_x_continuous(expand = c(0, 0), limits = c(1932, 2019)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 2.5)) +
  scale_colour_brewer(palette = "Dark2") +
  geom_line() + 
  theme_mft() +
  theme(legend.position = c(0,0),
        legend.justification = c(0,0))

## e) US Presidential Debates
p_deb <- deb_data %>%
  gather(foundation, percent, -party, -year, -speaker, -type) %>%
  mutate(foundation = factor(foundation, 
                             levels = c("Care", "Fairness", "Loyalty", 
                                        "Authority", "Sanctity"))) %>%
  group_by(year, foundation) %>%
  summarize(percent = mean(percent)) %>%
  ggplot(aes(x = year, y = percent, col=foundation, lty=foundation)) +
  labs(title = paste0("e) US Presidential Debates (N = ", nrow(deb_data), ")"),
       x = NULL, y="Percent") +
  scale_x_continuous(expand = c(0, 0), limits = c(1932, 2019)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 2.5)) +
  scale_colour_brewer(palette = "Dark2") +
  geom_line() + 
  theme_mft() +
  theme(legend.position = c(0,0),
        legend.justification = c(0,0))

## f) US Senate Emails
p_email <- email_data %>%
  gather(foundation, percent, -party, -year, -speaker, -type) %>%
  mutate(foundation = factor(foundation, 
                             levels = c("Care", "Fairness", "Loyalty", 
                                        "Authority", "Sanctity"))) %>%
  group_by(year, foundation) %>%
  summarize(percent = mean(percent)) %>%
  ggplot(aes(x = year, y = percent, col=foundation, lty=foundation)) +
  labs(title = paste0("f) US Senate Emails (N = ", nrow(email_data), ")"),
       x = NULL, y="Percent") +
  scale_x_continuous(expand = c(0, 0), limits = c(2010, 2020)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 2.5)) +
  scale_colour_brewer(palette = "Dark2") +
  geom_line() + 
  theme_mft() +
  theme(legend.position = c(0,0),
        legend.justification = c(0,0))

## Combine plots
grid.arrange(p_sotu, p_qs, p_co, p_pl, ncol = 1) %>%
  ggsave(here("out/appB1-mft_time.png"), ., height = 6, width = 8, dpi = 600)


# Most common MFT terms across foundations --------------------------------

## combine speeches in one tibble, count most common MFT terms
df <- bind_rows(sotu, qs, co, filter(pl, party != "Liberal"), deb) %>%
  mutate(text = tolower(text)) %>%
  add_count(type, party, name = "ndoc") %>%
  unnest_tokens(word, text) %>%
  filter(word %in% dict_unlist) %>%
  group_by(type, party, ndoc) %>%
  count(word) %>%
  mutate(perdoc = n/ndoc) %>%
  top_n(10, perdoc)

## only select terms that appear in both parts
right_join(df, 
           df %>%
             group_by(type) %>%
             count(word, wt=n()) %>%
             filter(n != 1) %>%
             select(-n)) %>%
  ungroup() %>%
  mutate(word = factor(word, levels = rev(levels(factor(word)))),
         party = recode_factor(party, 
                               `Republican` = " Republican / Conservative",
                               `Conservative` = " Republican / Conservative",
                               `Democratic` = " Democratic / Labour",
                               `Labour` = " Democratic / Labour"),
         type = factor(type, levels = c("a) US State of the Union",
                                        "c) US Convention Speeches",
                                        "e) US Presidential Debates",
                                        "b) UK Queen's Speeches",
                                        "d) UK Party Leader Speeches"))) %>%
  filter(type != "e) US Presidential Debates",
         type != "f) US Senate Emails") %>% 
  ggplot(aes(x=word, y=perdoc, fill=party)) +
  geom_col(position = "dodge") +
  facet_wrap(~type, scales = "free_y", ncol = 2) +
  coord_flip() +
  theme_mft() +
  scale_fill_brewer(palette = "Dark2",
                    guide = guide_legend(reverse = TRUE)) +
  labs(x = NULL, y = "Average Number of Mentions per Speech")

ggsave(here("out/appC1-mft_terms.png"), height = 5, width = 6, dpi = 600)
